clear
set more off
cd "C:\Users\emily.weber\Dropbox\intgmob_AssortativeMating\ReplicationPackage\"

cap log close

global dodir "C:\Users\emily.weber\Dropbox\intgmob_AssortativeMating\ReplicationPackage\dofiles\"

global tempdir "C:\Users\emily.weber\Dropbox\intgmob_AssortativeMating\ReplicationPackage\intermediatedata\"
global paneltemp "C:\Users\emily.weber\Dropbox\intgmob_AssortativeMating\ReplicationPackage\intermediatedata\panel\"
global ipumsdir "C:\Users\emily.weber\Dropbox\intgmob_AssortativeMating\ReplicationPackage\rawdata\census_1pct\"
global ipums100dir "C:\Users\emily.weber\Dropbox\intgmob_AssortativeMating\ReplicationPackage\rawdata\census_100pct\"
global histdir "C:\Users\emily.weber\Dropbox\intgmob_AssortativeMating\ReplicationPackage\rawdata\icpsr\"

*************************************************
*Urbanization
*************************************************

use $histdir/ICPSR_1850_stct.dta, clear
gen year2 = 1850
gen urban = urb850

foreach x in 860 870 880 890 900 910 920 930 940{
	append using $histdir/ICPSR_1`x'_stct.dta
	replace year2 = 1`x' if year2 == .
	replace urban = urb`x' if year2 == 1`x'
}

drop year
rename year2 year
keep if level == 2

replace mtot = m0 + m14 + m59 + m1014 + m1519 + m2029 + m3039 + m4049 + m5059 + m6069 + m7079 + m8089 + m9099 + m100 if year == 1860
gen pctmale = mtot/totpop
gen pcturb25 = urb25/totpop
gen pcturban = urban/totpop
replace fips = fips/1000

keep year fips name totpop pctmale pcturb25 pcturban
drop if fips == .

save $paneltemp/pcturb.dta, replace


*************************************************
*Farm value
*************************************************

use $histdir/ICPSR_1850_stct.dta, clear
gen year = 1850
keep if level == 2
replace fips = fips/1000
gen valperfarm = farmval / farms
keep year fips name farmval valperfarm
drop if fips == .
sleep 1000
save $paneltemp/farmval.dta, replace

use $histdir/ICPSR_1860_stct.dta, clear
gen year = 1860
keep if level == 2
replace fips = fips/1000
egen farmshold = rowtotal(farm*)
gen farms = farmshold - farmval
gen valperfarm = farmval / farms
keep year fips name farmval valperfarm
drop if fips == .
append using $paneltemp/farmval.dta
sleep 1000
save $paneltemp/farmval.dta, replace

foreach x in 1870 1880 1890 1900 1920 1940 {
use $histdir/ICPSR_`x'_stct.dta, clear
capture gen year = `x'
keep if level == 2
replace fips = fips/1000
gen valperfarm = farmval / farms
keep year fips name farmval valperfarm
drop if fips == .
append using $paneltemp/farmval.dta
sleep 1000
save $paneltemp/farmval.dta, replace
}

use $histdir/ICPSR_1910_stct.dta, clear
gen year = 1910
keep if level == 2
replace fips = fips/1000
gen farmval = favalown + favalten + favalman
gen valperfarm = farmval / farms
keep year fips name farmval valperfarm
drop if fips == .
append using $paneltemp/farmval.dta
sleep 1000
save $paneltemp/farmval.dta, replace

use $histdir/ICPSR_1930_stct.dta, clear
gen year = 1930
keep if level == 2
replace fips = fips/1000
gen farmval = favafown + favapown + favalman + favalten
gen valperfarm = farmval / farms
keep year fips name farmval valperfarm
drop if fips == .
append using $paneltemp/farmval.dta
sort year fips
sleep 1000
save $paneltemp/farmval.dta, replace

*************************************************
*Dwellings and Density
*************************************************

use $histdir/ICPSR_1850_stct.dta, clear
gen year = 1850
keep if level == 2
replace fips = fips/1000
gen popperdwell = totpop / dwell
gen famperdwell = families / dwell
keep year fips name dwell popperdwell famperdwell
drop if fips == .
sleep 1000
save $paneltemp/dwellings.dta, replace

use $histdir/ICPSR_1860_stct.dta, clear
gen year = 1860
keep if level == 2
replace fips = fips/1000
keep year state fips totpop families

merge 1:1 state fips using $histdir/ICPSR_1860_st.dta

keep if level == 2
gen popperdwell = totpop / dwell
gen famperdwell = families / dwell
keep year fips name dwell popperdwell famperdwell
drop if fips == .
append using $paneltemp/dwellings.dta
sleep 1000
save $paneltemp/dwellings.dta, replace

use $histdir/ICPSR_1870_st_3.dta, clear
gen year = 1870
keep if level == 2
gen popperdwell = totpop / dwell
gen famperdwell = families / dwell
keep year fips name dwell popperdwell famperdwell
drop if fips == .
append using $paneltemp/dwellings.dta
sleep 1000
save $paneltemp/dwellings.dta, replace

use $histdir/ICPSR_1880_stct.dta, clear
keep if level == 2
replace fips = fips/1000
keep year state fips totpop 

merge 1:1 state fips using $histdir/ICPSR_1880_st_2.dta

keep if level == 2
gen popperdwell = totpop / dwell
gen famperdwell = families / dwell
keep year fips name dwell popperdwell famperdwell
drop if fips == .
append using $paneltemp/dwellings.dta
sleep 1000
save $paneltemp/dwellings.dta, replace

foreach x in 1890 1900 1910 1920{
use $histdir/ICPSR_`x'_stct.dta, clear
capture gen year = `x'
keep if level == 2
replace fips = fips/1000
gen popperdwell = totpop / dwell
gen famperdwell = families / dwell
keep year fips name dwell popperdwell famperdwell
drop if fips == .
append using $paneltemp/dwellings.dta
sleep 1000
save $paneltemp/dwellings.dta, replace
}

use $histdir/ICPSR_1930_stct_4.dta, clear
gen year = 1930
keep if level == 2
replace fips = fips/1000
rename dwelltot dwell
gen popperdwell = totpop / dwell
gen famperdwell = famtot / dwell
keep year fips name dwell popperdwell famperdwell
drop if fips == .
append using $paneltemp/dwellings.dta
sleep 1000
save $paneltemp/dwellings.dta, replace

use $histdir/ICPSR_1940_stct.dta, clear
gen year = 1940
keep if level == 2
replace fips = fips/1000
gen popperdwell = totpop / dwell
keep year fips name dwell popperdwell
drop if fips == .
append using $paneltemp/dwellings.dta
sleep 1000
save $paneltemp/dwellings.dta, replace


*************************************************
*Sex Ratio
*************************************************

***5-20

use $histdir/ICPSR_1850_stct.dta, clear
gen year = 1850
gen pctmale_wtyouth = (wm59 + wm1014 + wm1519) / wpop519
keep if level == 2
keep year fips pctmale_wtyouth
replace fips = fips/1000
sleep 1000
save $paneltemp/pctmale_wtyouth.dta, replace

use $histdir/ICPSR_1860_stct.dta, clear
gen year = 1860
gen pctmale_wtyouth = (wm59 + wm1014 + wm1519) / (wm59 + wm1014 + wm1519 + wf59 + wf1014 + wf1519)
keep if level == 2
keep year fips pctmale_wtyouth
replace fips = fips/1000
append using $paneltemp/pctmale_wtyouth.dta
sleep 1000
save $paneltemp/pctmale_wtyouth.dta, replace

use $histdir/ICPSR_1870_st_3.dta, clear
gen year = 1870
gen pctmale_wtyouth = (mwh518)/(mwh518 + fwh518)
keep year fips pctmale_wtyouth
append using $paneltemp/pctmale_wtyouth.dta
sleep 1000
save $paneltemp/pctmale_wtyouth.dta, replace

use $histdir/ICPSR_1880_st_2.dta, clear
gen year = 1880
gen pctmale_wtyouth = (mnw517 + mfw517) / (mnw517 + mfw517 + fnw517 + ffw517)
keep year fips pctmale_wtyouth
append using $paneltemp/pctmale_wtyouth.dta
sleep 1000
save $paneltemp/pctmale_wtyouth.dta, replace

use $histdir/ICPSR_1890_stct.dta, clear
gen pctmale_wtyouth = (nbwm520 + fbwm520) / (nbwm520 + fbwm520 + nbwf520 + fbwf520)
keep if level == 2
keep year fips pctmale_wtyouth
replace fips = fips/1000
append using $paneltemp/pctmale_wtyouth.dta
sleep 1000
save $paneltemp/pctmale_wtyouth.dta, replace

use $histdir/ICPSR_1900_stct.dta, clear
gen pctmale_wtyouth = (nbwm520 + fbwm520) / (nbwm520 + fbwm520 + nbwf520 + fbwf520)
keep if level == 2
keep year fips pctmale_wtyouth
replace fips = fips/1000
append using $paneltemp/pctmale_wtyouth.dta
sleep 1000
save $paneltemp/pctmale_wtyouth.dta, replace

use $histdir/ICPSR_1930_stct_2.dta, clear
gen year = 1930
gen pctmale_wtyouth = (wm5_9 + wm1014 + wm1519) / (wm5_9 + wm1014 + wm1519 + wf5_9 + wf1014 + wf1519)
keep if level == 2
keep year fips pctmale_wtyouth
replace fips = fips/1000
append using $paneltemp/pctmale_wtyouth.dta
sleep 1000
save $paneltemp/pctmale_wtyouth.dta, replace

use $histdir/ICPSR_1940_stct_2.dta, clear
gen year = 1940
gen pctmale_wtyouth = (wm5_9 + wm1014 + wm1519) / (wm5_9 + wm1014 + wm1519 + wf5_9 + wf1014 + wf1519)
keep if level == 2
keep year fips pctmale_wtyouth
replace fips = fips/1000
append using $paneltemp/pctmale_wtyouth.dta
drop if fips == .
sleep 1000
save $paneltemp/pctmale_wtyouth.dta, replace


*************************************************
*Scholarization
*************************************************		
	
***Get as much from published data as possible

use $histdir/ICPSR_1850_stct.dta, clear
gen year = 1850
gen pctsch_pub = (wtotsch + fctotsch) / (wm59 + wm1014 + wm1519 + wf59 + wf1014 + wf1519 + fcm59 + fcm1014 + fcm1519 + fcf59 + fcf1014 + fcf1519)
keep if level == 2
keep year fips pctsch_pub
replace fips = fips/1000
sleep 1000
save $paneltemp/pctsch_pub.dta, replace
	
use $histdir/ICPSR_1860_stct.dta, clear
gen year = 1860
gen pop519 = (wm59 + wm1014 + wm1519 + wf59 + wf1014 + wf1519 + fcm59 + fcm1014 + fcm1519 + fcf59 + fcf1014 + fcf1519)
keep if level == 2
keep year fips pop519
replace fips = fips/1000
merge 1:1 fips using $histdir/ICPSR_1860_st.dta, nogen keepusing(wtotsch fctotsch)
gen pctsch_pub = (wtotsch + fctotsch) / pop519
drop if fips == .
keep year fips pctsch_pub
append using $paneltemp/pctsch_pub.dta
sleep 1000
save $paneltemp/pctsch_pub.dta, replace
	
use $histdir/ICPSR_1870_st_3.dta, clear
gen year = 1870
gen pctsch_pub = pupils / (m518 + f518)
keep if level == 2
keep year fips pctsch_pub	
append using $paneltemp/pctsch_pub.dta
sleep 1000
save $paneltemp/pctsch_pub.dta, replace

use $histdir/ICPSR_1880_st_2.dta, clear
gen year = 1880
gen pop517 = (mnw517 + fnw517 + mfw517 + ffw517 + mcol517 + fcol517)
keep if level == 2
keep year fips pop517
merge 1:1 fips using $histdir/ICPSR_1880_st_1.dta, nogen keepusing(pupils)
gen pctsch_pub = pupils / pop517
drop if fips == .
keep year fips pctsch_pub
append using $paneltemp/pctsch_pub.dta
sleep 1000
save $paneltemp/pctsch_pub.dta, replace	
	
use $histdir/ICPSR_1890_stct.dta, clear
gen pctsch_pub = (cswmpup + cscompup + cswfpup + cscofpup) / (nbwm520 + fbwm520 + colm520 + nbwf520 + fbwf520 + colf520)
keep if level == 2
keep year fips pctsch_pub
replace fips = fips/1000
append using $paneltemp/pctsch_pub.dta
sleep 1000
save $paneltemp/pctsch_pub.dta, replace
	
use $histdir/ICPSR_1910_stct.dta, clear
gen year = 1910
gen pctsch_pub = (tsch69 + tsch1014 + tsch1517 + tsch1820) / (t69tot + t1014tot + t1517tot + t1820tot)
keep if level == 2
keep year fips pctsch_pub
replace fips = fips/1000
append using $paneltemp/pctsch_pub.dta
sleep 1000
save $paneltemp/pctsch_pub.dta, replace
	
	
foreach y in 1920 1930 1940{	
use $histdir/ICPSR_`y'_stct.dta, clear
gen year = `y'
gen pctsch_pub = (tsch713 + tsch1415 + tsch1617 + tsch1820) / (t713tot + t1415tot + t1617tot + t1820tot)
keep if level == 2
keep year fips pctsch_pub
replace fips = fips/1000
append using $paneltemp/pctsch_pub.dta
sleep 1000
save $paneltemp/pctsch_pub.dta, replace
}

*************************************************
*Industry Composition
*************************************************

***Farmers, using published data
		
use $histdir/ICPSR_1860_stct.dta, clear
gen year = 1860
keep if level == 2
keep year fips families
replace fips = fips/1000
merge 1:1 fips using $histdir/ICPSR_1860_st.dta, nogen keepusing(farms)
gen pctfarm_fam = farms/families
drop if fips == .
keep year fips pctfarm_fam
sleep 1000
save $paneltemp/pctfarm_fam.dta, replace		

use $histdir/ICPSR_1880_stct.dta, clear
keep if level == 2
keep year fips farms
replace fips = fips/1000
merge 1:1 fips using $histdir/ICPSR_1880_st_2.dta, nogen keepusing(families)
gen pctfarm_fam = farms/families
drop if fips == .
keep year fips pctfarm_fam
append using $paneltemp/pctfarm_fam.dta
sleep 1000
save $paneltemp/pctfarm_fam.dta, replace		

use $histdir/ICPSR_1930_stct.dta, clear
gen year = 1930
keep year fips farms state county
merge 1:1 state county using $histdir/ICPSR_1930_stct_4.dta, nogen keepusing(famtot level)
gen pctfarm_fam = farms/famtot
replace fips = fips/1000
drop if fips == .
keep if level == 2
keep year fips pctfarm_fam
append using $paneltemp/pctfarm_fam.dta
sleep 1000
save $paneltemp/pctfarm_fam.dta, replace		
		
foreach y in 1850 1910 1920{		
use $histdir/ICPSR_`y'_stct.dta, clear
gen year = `y'
gen pctfarm_fam = farms/families
drop if fips == .
keep if level == 2
keep year fips pctfarm_fam
replace fips = fips/1000
append using $paneltemp/pctfarm_fam.dta
sleep 1000
save $paneltemp/pctfarm_fam.dta, replace
}
	
foreach y in 1890 1900 {
use $histdir/ICPSR_`y'_stct.dta, clear
gen pctfarm_fam = farms/families
drop if fips == .
keep if level == 2
keep year fips pctfarm_fam
replace fips = fips/1000
append using $paneltemp/pctfarm_fam.dta
sleep 1000
save $paneltemp/pctfarm_fam.dta, replace
}


***Manufacturing, using published data

use $histdir/ICPSR_1850_stct.dta, clear
keep if level == 2
gen year = 1850
keep year fips totpop mtot ftot
replace fips = fips/1000
merge 1:1 fips using $histdir/ICPSR_1850_st.dta, nogen keepusing(mfglabm mfglabm2 mfglabf mfglabf2)
gen pctmfglab_pub = (mfglabm + mfglabf) / totpop
gen pctmfglabm_pub = mfglabm2 / mtot
gen pctmfglabf_pub = mfglabf2 / ftot
keep year fips pctmfglab_pub pctmfglabm_pub pctmfglabf_pub
drop if fips == .
sleep 1000
save $paneltemp/pctmfglab_pub.dta, replace

use $histdir/ICPSR_1860_stct.dta, clear
gen year = 1860
gen pctmfglab_pub = (mfglabm + mfglabf) / totpop
keep if level == 2
keep year fips pctmfglab_pub 
replace fips = fips/1000
drop if fips == .
append using $paneltemp/pctmfglab_pub.dta
sleep 1000
save $paneltemp/pctmfglab_pub.dta, replace	

use $histdir/ICPSR_1870_stct.dta, clear
gen year = 1870
gen pctmfglab_pub = (mfglbm16 + mfglbf16 + mfglbch) / totpop
gen pctmfglabm_pub = mfglbm16 / mtot
gen pctmfglabf_pub = mfglbf16 / ftot
keep if level == 2
keep year fips pctmfglab_pub pctmfglabm_pub pctmfglabf_pub
replace fips = fips/1000
drop if fips == .
append using $paneltemp/pctmfglab_pub.dta
sleep 1000
save $paneltemp/pctmfglab_pub.dta, replace	
	
use $histdir/ICPSR_1880_stct.dta, clear
gen pctmfglab_pub = (mfglbm16 + mfglbf15 + mfglbch) / totpop
gen pctmfglabm_pub = mfglbm16 / mtot
gen pctmfglabf_pub = mfglbf15 / ftot
keep if level == 2
keep year fips pctmfglab_pub pctmfglabm_pub pctmfglabf_pub
replace fips = fips/1000
drop if fips == .
append using $paneltemp/pctmfglab_pub.dta
sleep 1000
save $paneltemp/pctmfglab_pub.dta, replace
	
use $histdir/ICPSR_1890_stct.dta, clear
gen pctmfglab_pub = (mfglbm16 + mfglbf15 + mfglbch) / totpop
gen pctmfglabm_pub = mfglbm16 / mtot
gen pctmfglabf_pub = mfglbf15 / ftot
keep if level == 2
keep year fips pctmfglab_pub pctmfglabm_pub pctmfglabf_pub
replace fips = fips/1000
drop if fips == .
append using $paneltemp/pctmfglab_pub.dta
sleep 1000
save $paneltemp/pctmfglab_pub.dta, replace

use $histdir/ICPSR_1900_stct.dta, clear
gen pctmfglab_pub = (mfglbm16 + mfglbf16 + mfglbch) / totpop
gen pctmfglabm_pub = mfglbm16 / mtot
gen pctmfglabf_pub = mfglbf16 / ftot
keep if level == 2
keep year fips pctmfglab_pub pctmfglabm_pub pctmfglabf_pub
replace fips = fips/1000
drop if fips == .
append using $paneltemp/pctmfglab_pub.dta
sleep 1000
save $paneltemp/pctmfglab_pub.dta, replace


*************************************************
*Immigration
*************************************************

use $histdir/ICPSR_1850_stct.dta, clear
keep if level == 2
gen year = 1850
keep year fips totpop wmtot wftot
replace fips = fips/1000
merge 1:1 fips using $histdir/ICPSR_1850_st.dta, nogen keepusing(forpop fbwf fbwm fbwt)
gen pctfor = forpop / totpop
gen pctforwt = fbwt / (wmtot + wftot)
gen pctforwtm = fbwm / wmtot
gen pctforwtf = fbwf / wftot
keep year fips pctfor pctforwt pctforwtm pctforwtf
drop if fips == .
sleep 1000
save $paneltemp/pctfor.dta, replace

use $histdir/ICPSR_1860_stct.dta, clear
gen year = 1860
gen pctforwt = (fbwmtot + fbwftot) / (wmtot + wftot)
gen pctforwtm = fbwmtot / wmtot
gen pctforwtf = fbwftot / wftot
replace fips = fips/1000
drop if fips == .
keep if level == 2
keep year fips totpop pctforwt pctforwtm pctforwtf
merge 1:1 fips using $histdir/ICPSR_1860_st.dta, nogen keepusing(forpop)
gen pctfor = forpop /totpop
drop if fips == .
keep year fips pctfor pctforwt pctforwtm pctforwtf
append using $paneltemp/pctfor.dta
sleep 1000
save $paneltemp/pctfor.dta, replace

use $histdir/ICPSR_1870_stct.dta, clear
gen year = 1870
gen pctfor = fbtot / totpop
replace fips = fips/1000
drop if fips == .
keep if level == 2
keep year fips pctfor whtot
merge 1:1 fips using $histdir/ICPSR_1870_st_1.dta, nogen keepusing(whtotfb)
gen pctforwt = whtotfb / whtot
drop if fips == .
keep year fips pctfor pctforwt
append using $paneltemp/pctfor.dta
sleep 1000
save $paneltemp/pctfor.dta, replace

use $histdir/ICPSR_1880_stct.dta, clear
gen pctfor = fbtot / totpop
replace fips = fips/1000
drop if fips == .
keep if level == 2
keep year fips pctfor
merge 1:1 fips using $histdir/ICPSR_1880_st_2.dta, nogen keepusing(ffbwh fnbwh mfbwh mnbwh)
gen pctforwt = (ffbwh + mfbwh) / (ffbwh + fnbwh + mfbwh + mnbwh)
gen pctforwtm = (mfbwh) / (mfbwh + mnbwh)
gen pctforwtf = (ffbwh) / (ffbwh + fnbwh)
drop if fips == .
keep year fips pctfor pctforwt pctforwtm pctforwtf
append using $paneltemp/pctfor.dta
sleep 1000
save $paneltemp/pctfor.dta, replace

foreach y in 1890 1900{
use $histdir/ICPSR_`y'_stct.dta, clear
gen pctfor = (fbmtot + fbftot) / totpop
gen pctforwt = (fbwmtot + fbwftot) / (wmtot + wftot)
gen pctforwtm = fbwmtot / wmtot
gen pctforwtf = fbwftot / wftot
drop if fips == .
keep if level == 2
keep year fips pctfor pctforwt pctforwtm pctforwtf
replace fips = fips/1000
append using $paneltemp/pctfor.dta
sleep 1000
save $paneltemp/pctfor.dta, replace
}

use $histdir/ICPSR_1910_stct.dta, clear
gen year = 1910
*gen pctfor = (fbmtot + fbftot) / totpop
gen pctforwt = fbwtot / whtot
*gen pctforwtm = fbwmtot / wmtot
*gen pctforwtf = fbwftot / wftot
drop if fips == .
keep if level == 2
keep year fips pctfor pctforwt 
replace fips = fips/1000
append using $paneltemp/pctfor.dta
sleep 1000
save $paneltemp/pctfor.dta, replace

use $histdir/ICPSR_1920_stct.dta, clear
gen year = 1920
*gen pctfor = (fbmtot + fbftot) / totpop
gen pctforwt = (fbwmtot + fbwftot) / whtot
gen pctforwtm = fbwmtot / wmtot
gen pctforwtf = fbwftot / wftot
drop if fips == .
keep if level == 2
keep year fips pctforwt pctforwtm pctforwtf
replace fips = fips/1000
append using $paneltemp/pctfor.dta
sleep 1000
save $paneltemp/pctfor.dta, replace

use $histdir/ICPSR_1930_stct.dta, clear
gen year = 1930
*gen pctfor = (fbmtot + fbftot) / totpop
gen pctforwt = (fbwmtot + fbwftot) / whtot
gen pctforwtm = fbwmtot / wmtot
gen pctforwtf = fbwftot / wftot
drop if fips == .
keep if level == 2
keep year fips pctforwt pctforwtm pctforwtf
replace fips = fips/1000
append using $paneltemp/pctfor.dta
sleep 1000
save $paneltemp/pctfor.dta, replace

use $histdir/ICPSR_1940_stct.dta, clear
gen year = 1940
gen pctfor = (fbftot + fbmtot) / totpop
keep year fips pctfor state county level
merge 1:1 level state county using $histdir/ICPSR_1940_stct_2.dta, nogen keepusing(fbwmtot fbwftot wmtot wftot)
gen pctforwt = (fbwmtot + fbwftot) / (wmtot + wftot)
gen pctforwtm = fbwmtot / wmtot
gen pctforwtf = fbwftot / wftot
replace fips = fips/1000
drop if fips == .
keep if level == 2
keep year fips pctfor pctforwt pctforwtm pctforwtf
append using $paneltemp/pctfor.dta
sleep 1000
save $paneltemp/pctfor.dta, replace

*Ages 5-20

use $histdir/ICPSR_1890_stct.dta, clear
gen pctforyouth_pub = (fbwm520 + fbwf520) / (fbwm520 + nbwm520 + fbwf520 + nbwf520)
gen pctforyouthm_pub = fbwm520 / (fbwm520 + nbwm520)
gen pctforyouthf_pub = fbwf520 / (fbwf520 + nbwf520)
drop if fips == .
keep if level == 2
keep year fips pctforyouth_pub pctforyouthm_pub pctforyouthf_pub
replace fips = fips/1000
sleep 1000
save $paneltemp/pctforyouth.dta, replace

use $histdir/ICPSR_1900_stct.dta, clear
gen pctforyouth_pub = (fbwm520 + fbwf520) / (fbwm520 + nbwm520 + fbwf520 + nbwf520)
gen pctforyouthm_pub = fbwm520 / (fbwm520 + nbwm520)
gen pctforyouthf_pub = fbwf520 / (fbwf520 + nbwf520)
drop if fips == .
keep if level == 2
keep year fips pctforyouth_pub pctforyouthm_pub pctforyouthf_pub
replace fips = fips/1000
append using $paneltemp/pctforyouth.dta
sleep 1000
save $paneltemp/pctforyouth.dta, replace

foreach y in 1930 1940{
use $histdir/ICPSR_`y'_stct_2.dta, clear
gen year = `y'
gen pctforyouth_pub = (fbwm5_9 + fbwm1014 + fbwm1519 + fbwf5_9 + fbwf1014 + fbwf1519) / (wm5_9 + wm1014 + wm1519 + wf5_9 + wf1014 + wf1519)
gen pctforyouthm_pub = (fbwm5_9 + fbwm1014 + fbwm1519) / (wm5_9 + wm1014 + wm1519)
gen pctforyouthf_pub = (fbwf5_9 + fbwf1014 + fbwf1519) / (wf5_9 + wf1014 + wf1519)
drop if fips == .
keep if level == 2
keep year fips pctforyouth_pub pctforyouthm_pub pctforyouthf_pub
replace fips = fips/1000
append using $paneltemp/pctforyouth.dta
sleep 1000
save $paneltemp/pctforyouth.dta, replace
}
*/

*************************************************
*Land Inequality
*************************************************

do dofiles\01-clean_data\10-farmsizegini.do

*************************************************
*Railroads
*************************************************

use $histdir/RR_1850.dta,clear
collapse (sum) sqmi50 rr15_1850 rr40_1850, by(statefip)
rename statefip fips
gen pctrr15 = rr15_1850/sqmi50
gen pctrr40 = rr40_1850/sqmi50
keep fips pctrr15 pctrr40
gen year = 1850
sleep 1000
save $paneltemp/pctrr.dta, replace

foreach y in 60 70 80{
	use $histdir/RR_18`y'.dta,clear
	collapse (sum) sqmi`y' rr15_18`y' rr40_18`y', by(statefip)
	rename statefip fips
	gen pctrr15 = rr15_18`y'/sqmi`y'
	gen pctrr40 = rr40_18`y'/sqmi`y'
	keep fips pctrr15 pctrr40
	gen year = 18`y'
	append using $paneltemp/pctrr.dta
sleep 1000
	save $paneltemp/pctrr.dta, replace
	}


*************************************************
*Using IPUMS data
*************************************************

use $ipumsdir/1860_1%.dta, clear
	keep year age sex race statefip school relate occ1950 ind1950 bpl sploc occscore
	*keep if race == 1
	rename statefip fips
	
	gen pctmale_wt1530 = abs(sex-2) if age >= 15 & age <= 30 & race == 1
	gen pctmale_wt3145 = abs(sex-2) if age >= 31 & age <= 45 & race == 1
	gen pctmale_wtyouth_ipums = abs(sex-2) if age >= 5 & age <= 20 & race == 1
	gen pctsch_wt520 = (school == 2) if age >= 5 & age <= 20 & race == 1
	gen pctsch_wtm520 = (school == 2) if age >= 5 & age <= 20 & race == 1 & sex == 1
	gen pctsch_wtf520 = (school == 2) if age >= 5 & age <= 20 & race == 1 & sex == 2
	
	gen pctfarm_hhhd = (occ1950 == 100) if race == 1 & relate == 1 & sex == 1
	gen pctsvclab_ipums = (ind1950 >= 500 & ind1950 <= 899)
	gen pctmfglab_ipums = (ind1950 >= 300 & ind1950 <= 499)
	gen pctmfglabf_ipums = (ind1950 >= 300 & ind1950 <= 499 & age >= 16) if sex == 2
	gen pctmfglabm_ipums = (ind1950 >= 300 & ind1950 <= 499 & age >= 16) if sex == 1
	
	gen pctfor_ipums = (bpl >= 100)
	gen pctforwtm_ipums = (bpl >= 100) if sex == 1 & race == 1
	gen pctforwtf_ipums = (bpl >= 100) if sex == 2 & race == 1
	gen pctforyouth = (bpl >= 100) if age >= 5 & age <= 20 & race == 1
	gen pctforyouthf = (bpl >= 100) if sex == 2 & age >= 5 & age <= 20 & race == 1
	gen pctforyouthm = (bpl >= 100) if sex == 1 & age >= 5 & age <= 20 & race == 1
	
	gen logoccscore_nvrmar_2035 = log(occscore + 1) if sex == 2 & race == 1 & age >= 20 & age <= 35 & sploc == 0
	gen logoccscore_nvrmar_3045 = log(occscore + 1) if sex == 2 & race == 1 & age >= 30 & age <= 45 & sploc == 0
	
	gen pctanyocc_f2035 = occ1950 < 980 if sex == 2 & race == 1 & age >= 20 & age <= 35 
	gen pctanyocc_f3045 = occ1950 < 980 if sex == 2 & race == 1 & age >= 30 & age <= 45 
	
	gen pctanyocc_nvrmar_f2035 = occ1950 < 980 if sex == 2 & race == 1 & age >= 20 & age <= 35  & sploc == 0
	gen pctanyocc_nvrmar_f3045 = occ1950 < 980 if sex == 2 & race == 1 & age >= 30 & age <= 45  & sploc == 0
	
	gen pctclericsale_2035 = (occ1950 >= 300 & occ1950 <= 490) if race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctclericsale_m2035 = (occ1950 >= 300 & occ1950 <= 490) if sex == 1 & race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctclericsale_f2035 = (occ1950 >= 300 & occ1950 <= 490) if sex == 2 & race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctclericsale_3045 = (occ1950 >= 300 & occ1950 <= 490) if race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	gen pctclericsale_m3045 = (occ1950 >= 300 & occ1950 <= 490) if sex == 1 & race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	gen pctclericsale_f3045 = (occ1950 >= 300 & occ1950 <= 490) if sex == 2 & race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	
	
	gen pctteacher_2035 = (occ1950 == 093) if race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctteacher_f2035 = (occ1950 == 093) if sex == 2 & race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctteacher_3045 = (occ1950 == 093) if race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	gen pctteacher_f3045 = (occ1950 == 093) if sex == 2 & race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	
	collapse (mean) pctmale_wt1530 pctmale_wt3145 pctmale_wtyouth_ipums pctsch_wt520  pctsch_wtm520 pctsch_wtf520 pctfarm_hhhd  /*
		*/ pctfor_ipums pctforwtm_ipums pctforwtf_ipums pctforyouth pctforyouthf pctforyouthm /*
		*/ pctsvclab_ipums pctmfglab_ipums pctmfglabm_ipums pctmfglabf_ipums logoccscore_nvrmar_2035 logoccscore_nvrmar_3045/*
		*/ pctanyocc_* pctclericsale_* pctteacher_*, by(fips year)

	save $paneltemp/ipumsvars.dta, replace
	
foreach y in 1870 /*1880*/  /* 1900 1910 1920 1930 1940 */{
	use $ipumsdir/`y'_1%.dta, clear
	keep year age sex race statefip school relate occ1950 ind1950 bpl sploc occscore
	*keep if race == 1
	rename statefip fips
	gen pctmale_wt1530 = abs(sex-2) if age >= 15 & age <= 30 & race == 1
	gen pctmale_wt3145 = abs(sex-2) if age >= 31 & age <= 45 & race == 1
	gen pctmale_wtyouth_ipums = abs(sex-2) if age >= 5 & age <= 20 & race == 1
	gen pctsch_wt520 = (school == 2) if age >= 5 & age <= 20 & race == 1
	gen pctsch_wtm520 = (school == 2) if age >= 5 & age <= 20 & race == 1 & sex == 1
	gen pctsch_wtf520 = (school == 2) if age >= 5 & age <= 20 & race == 1 & sex == 2
	
	
	gen pctfarm_hhhd = (occ1950 == 100) if race == 1 & relate == 1 & sex == 1
	gen pctsvclab_ipums = (ind1950 >= 500 & ind1950 <= 899)
	gen pctmfglab_ipums = (ind1950 >= 300 & ind1950 <= 499)
	gen pctmfglabf_ipums = (ind1950 >= 300 & ind1950 <= 499 & age >= 16) if sex == 2
	gen pctmfglabm_ipums = (ind1950 >= 300 & ind1950 <= 499 & age >= 16) if sex == 1
	
	gen pctfor_ipums = (bpl >= 100)
	gen pctforwtm_ipums = (bpl >= 100) if sex == 1 & race == 1
	gen pctforwtf_ipums = (bpl >= 100) if sex == 2 & race == 1
	gen pctforyouth = (bpl >= 100) if age >= 5 & age <= 20 & race == 1
	gen pctforyouthf = (bpl >= 100) if sex == 2 & age >= 5 & age <= 20 & race == 1
	gen pctforyouthm = (bpl >= 100) if sex == 1 & age >= 5 & age <= 20 & race == 1
	
	gen logoccscore_nvrmar_2035 = log(occscore + 1) if sex == 2 & race == 1 & age >= 20 & age <= 35 & sploc == 0
	gen logoccscore_nvrmar_3045 = log(occscore + 1) if sex == 2 & race == 1 & age >= 30 & age <= 45 & sploc == 0

	gen pctanyocc_f2035 = occ1950 < 980 if sex == 2 & race == 1 & age >= 20 & age <= 35 
	gen pctanyocc_f3045 = occ1950 < 980 if sex == 2 & race == 1 & age >= 30 & age <= 45 
	
	gen pctanyocc_nvrmar_f2035 = occ1950 < 980 if sex == 2 & race == 1 & age >= 20 & age <= 35  & sploc == 0
	gen pctanyocc_nvrmar_f3045 = occ1950 < 980 if sex == 2 & race == 1 & age >= 30 & age <= 45  & sploc == 0
	
	gen pctclericsale_2035 = (occ1950 >= 300 & occ1950 <= 490) if race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctclericsale_m2035 = (occ1950 >= 300 & occ1950 <= 490) if sex == 1 & race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctclericsale_f2035 = (occ1950 >= 300 & occ1950 <= 490) if sex == 2 & race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctclericsale_3045 = (occ1950 >= 300 & occ1950 <= 490) if race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	gen pctclericsale_m3045 = (occ1950 >= 300 & occ1950 <= 490) if sex == 1 & race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	gen pctclericsale_f3045 = (occ1950 >= 300 & occ1950 <= 490) if sex == 2 & race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	
	gen pctteacher_2035 = (occ1950 == 093) if race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctteacher_f2035 = (occ1950 == 093) if sex == 2 & race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctteacher_3045 = (occ1950 == 093) if race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	gen pctteacher_f3045 = (occ1950 == 093) if sex == 2 & race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	
	collapse (mean) pctmale_wt1530 pctmale_wt3145 pctmale_wtyouth_ipums pctsch_wt520  pctsch_wtm520 pctsch_wtf520 pctfarm_hhhd  /*
		*/ pctfor_ipums pctforwtm_ipums pctforwtf_ipums pctforyouth pctforyouthf pctforyouthm /*
		*/ pctsvclab_ipums pctmfglab_ipums pctmfglabm_ipums pctmfglabf_ipums logoccscore_nvrmar_2035 logoccscore_nvrmar_3045/*
		*/ pctanyocc_* pctclericsale_* pctteacher_*, by(fips year)

	append using $paneltemp/ipumsvars.dta
sleep 1000
	save $paneltemp/ipumsvars.dta, replace
}

cd "$ipums100dir"

foreach y in 1850 1880 1900 1910 1920 1930 1940{
	
	unzipfile "`y'_100%_IPUMS.zip", replace
	use "`y'_100%_IPUMS.dta", clear
	
	if `y' == 1880{
		replace school = .
		}
	
	if `y' == 1850{
		gen marst = .
		replace marst = 6 if sploc == 0
	}
		
	keep year age sex race statefip school relate occ1950 ind1950 bpl marst occscore
	*keep if race == 1
	rename statefip fips
	gen pctmale_wt1530 = abs(sex-2) if age >= 15 & age <= 30 & race == 1
	gen pctmale_wt3145 = abs(sex-2) if age >= 31 & age <= 45 & race == 1
	gen pctmale_wtyouth_ipums = abs(sex-2) if age >= 5 & age <= 20 & race == 1
	gen pctsch_wt520 = (school == 2) if age >= 5 & age <= 20 & race == 1
	gen pctsch_wtm520 = (school == 2) if age >= 5 & age <= 20 & race == 1 & sex == 1
	gen pctsch_wtf520 = (school == 2) if age >= 5 & age <= 20 & race == 1 & sex == 2
	
	gen pctfarm_hhhd = (occ1950 == 100) if race == 1 & relate == 1 & sex == 1
	gen pctsvclab_ipums = (ind1950 >= 500 & ind1950 <= 899)
	gen pctmfglab_ipums = (ind1950 >= 300 & ind1950 <= 499)
	gen pctmfglabf_ipums = (ind1950 >= 300 & ind1950 <= 499 & age >= 16) if sex == 2
	gen pctmfglabm_ipums = (ind1950 >= 300 & ind1950 <= 499 & age >= 16) if sex == 1
	
	gen pctfor_ipums = (bpl >= 100)
	gen pctforwtm_ipums = (bpl >= 100) if sex == 1 & race == 1
	gen pctforwtf_ipums = (bpl >= 100) if sex == 2 & race == 1
	gen pctforyouth = (bpl >= 100) if age >= 5 & age <= 20 & race == 1
	gen pctforyouthf = (bpl >= 100) if sex == 2 & age >= 5 & age <= 20 & race == 1
	gen pctforyouthm = (bpl >= 100) if sex == 1 & age >= 5 & age <= 20 & race == 1
	
	
	gen logoccscore_nvrmar_2035 = log(occscore + 1) if sex == 2 & race == 1 & age >= 20 & age <= 35 & marst == 6
	gen logoccscore_nvrmar_3045 = log(occscore + 1) if sex == 2 & race == 1 & age >= 30 & age <= 45 & marst == 6
	
	gen pctanyocc_f2035 = occ1950 < 980 if sex == 2 & race == 1 & age >= 20 & age <= 35 
	gen pctanyocc_f3045 = occ1950 < 980 if sex == 2 & race == 1 & age >= 30 & age <= 45 
	
	gen pctanyocc_nvrmar_f2035 = occ1950 < 980 if sex == 2 & race == 1 & age >= 20 & age <= 35  & marst == 6
	gen pctanyocc_nvrmar_f3045 = occ1950 < 980 if sex == 2 & race == 1 & age >= 30 & age <= 45  & marst == 6
	
	gen pctclericsale_2035 = (occ1950 >= 300 & occ1950 <= 490) if race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctclericsale_m2035 = (occ1950 >= 300 & occ1950 <= 490) if sex == 1 & race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctclericsale_f2035 = (occ1950 >= 300 & occ1950 <= 490) if sex == 2 & race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctclericsale_3045 = (occ1950 >= 300 & occ1950 <= 490) if race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	gen pctclericsale_m3045 = (occ1950 >= 300 & occ1950 <= 490) if sex == 1 & race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	gen pctclericsale_f3045 = (occ1950 >= 300 & occ1950 <= 490) if sex == 2 & race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	
	
	gen pctteacher_2035 = (occ1950 == 093) if race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctteacher_f2035 = (occ1950 == 093) if sex == 2 & race == 1 & age >= 20 & age <= 35 & occ1950 < 980
	gen pctteacher_3045 = (occ1950 == 093) if race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	gen pctteacher_f3045 = (occ1950 == 093) if sex == 2 & race == 1 & age >= 30 & age <= 45 & occ1950 < 980
	
	collapse (mean) pctmale_wt1530 pctmale_wt3145 pctmale_wtyouth_ipums pctsch_wt520 pctsch_wtm520 pctsch_wtf520 pctfarm_hhhd /*
		*/ pctfor_ipums pctforwtm_ipums pctforwtf_ipums pctforyouth pctforyouthf pctforyouthm /*
		*/ pctsvclab_ipums pctmfglab_ipums pctmfglabm_ipums pctmfglabf_ipums logoccscore_nvrmar_2035 logoccscore_nvrmar_3045 /*
		*/ pctanyocc_* pctclericsale_* pctteacher_*, by(fips year)

append using $paneltemp/ipumsvars.dta
save $paneltemp/ipumsvars.dta, replace 
erase "$ipums100dir/`y'_100%_IPUMS.dta"
}


use $ipumsdir/1880_1%.dta, clear
	keep year age sex race statefip school 
	rename statefip fips
	gen school_1880 = (school == 2) if age >= 5 & age <= 20 & race == 1
	gen school_1880_m = (school == 2) if age >= 5 & age <= 20 & race == 1 & sex == 1
	gen school_1880_f = (school == 2) if age >= 5 & age <= 20 & race == 1 & sex == 2
	collapse (mean) school_1880 school_1880_m school_1880_f, by(fips year)
	save $paneltemp/ipums1880.dta, replace	

use $histdir/ICPSR_1890_stct.dta, clear
gen pctfarm_hhhd = farmfams/families
gen pctsch_wt520 = (cswmpup + cswfpup) / (nbwm520 + fbwm520 + nbwf520 + fbwf520)
drop if fips == .
keep if level == 2
keep year fips pctfarm_hhhd pctsch_wt520
replace fips = fips/1000
append using $paneltemp/ipumsvars.dta

merge 1:1 year fips using $paneltemp/pctmfglab_pub.dta, nogen keepusing(pctmfglab_pub pctmfglabf_pub pctmfglabm_pub)

foreach var in pctmfglab pctmfglabf pctmfglabm{
reg `var'_ipums `var'_pub i.fips
predict ipums_predict
replace `var'_ipums = ipums_predict if year == 1890
drop ipums_predict `var'_pub
}

merge 1:1 year fips using $paneltemp/ipums1880.dta, nogen keepusing(school_1880*)
replace pctsch_wt520 = school_1880 if year == 1880
replace pctsch_wtm520 = school_1880_m if year == 1880
replace pctsch_wtf520 = school_1880_f if year == 1880
drop school_1880*

save $paneltemp/ipumsvars.dta, replace 


*************************************************
*Merge Everything Together
*************************************************

use $paneltemp/pcturb.dta, clear

merge 1:1 year fips using $paneltemp/pctmale_wtyouth.dta, nogen
merge 1:1 year fips using $paneltemp/pctsch_pub.dta, nogen
merge 1:1 year fips using $paneltemp/pctfarm_fam.dta, nogen
merge 1:1 year fips using $paneltemp/pctmfglab_pub.dta, nogen
merge 1:1 year fips using $paneltemp/pctfor.dta, nogen
merge 1:1 year fips using $paneltemp/pctforyouth.dta, nogen
merge 1:1 year fips using $paneltemp/ginifarmsize.dta, nogen keepusing(ginifarmsize_diff ginifarmsize_con)
merge 1:1 year fips using $paneltemp/pctrr.dta, nogen
merge 1:1 year fips using $paneltemp/ipumsvars.dta, nogen
merge 1:1 year fips using $paneltemp/farmval.dta, nogen
merge 1:1 year fips using $paneltemp/dwellings.dta, nogen

sort fips year

foreach var of varlist pctmale_wt1530 pctmale_wt3145 logoccscore_nvrmar_2035 logoccscore_nvrmar_3045 pctanyocc_* pctclericsale_* pctteacher_*{
replace `var' = (`var'[_n-1] + `var'[_n+1])/2 if year == 1890
}

replace pctrr15 = pctrr15[_n-1] if year > 1880 & fips == fips[_n-1]
replace pctrr40 = pctrr40[_n-1] if year > 1880 & fips == fips[_n-1]

replace pctmale_wtyouth = pctmale_wtyouth_ipums if (year == 1910 | year == 1920)
drop pctmale_wtyouth_ipums

replace pctforwtm = pctforwtm_ipums if (year == 1870 | year == 1910)
replace pctforwtf = pctforwtf_ipums if (year == 1870 | year == 1910)
drop pctforwtm_ipums pctforwtf_ipums

replace pctfor = pctfor_ipums if (year == 1910 | year == 1920 | year == 1930)
drop pctfor_ipums

replace pctforyouth = pctforyouth_pub if (year==1890 | year==1900 | year==1930 | year==1940)
replace pctforyouthm = pctforyouthm_pub if (year==1890 | year==1900 | year==1930 | year==1940)
replace pctforyouthf = pctforyouthf_pub if (year==1890 | year==1900 | year==1930 | year==1940)
drop pctforyouth_pub pctforyouthm_pub pctforyouthf_pub

save $tempdir/panel_controls.dta, replace
*export excel "output/Panel Regression Analysis/panel_controls.xlsx", firstrow(variables) replace


